/*==================================================
Project:       Targeting Social Programs
Authors:       Diether W. Beuermann
               Bridget Hoffmann        
               Marco Stampini 
               David L. Vargas
               Diego Vera-Cossio
----------------------------------------------------
Creation Date:    Jul 2024
Modification Date:   
Do-file version:    01
References:          
Output:             
==================================================*/

/*==================================================
              0: Program set up
==================================================*/
*Written on STATA 17
drop _all
 
** source dir
cd "${dir4r}" // graph dir

/*==================================================
             1: load and overall prep
==================================================*/

*----------- 1.0.1 Load CRRA welfare FUN
qui do "${dir6r}/ados/bstrap_pmt.ado" // load bstap FUN
qui do "${dir6r}/ados/bstrap_pmt_mh.ado" // load bstap FUN
qui do "${dir6r}/ados/bstrap_pmt_mh4.ado" // load bstap FUN
qui do "${dir6r}/ados/baseline_pmt.ado" // load baseline FUN
qui do "${dir6r}/ados/expanded_pmt.ado" // load expanded FUN

*---------- 1.2: Relevant macros
// All stat variables in survey (source SISBEN, we just restrict the sample)
glo t2_1 age_feb20 no_edu elementary highschool tertiary_edu works_0 ///
formal_work n_members urban pp_inc
// Shares assets with sisben 
glo t2_1b age_feb20 no_edu elementary highschool tertiary_edu works_0 ///
formal_work n_members urban pp_inc
// GEIH variables
glo t2_2 age_head no_edu elementary highschool tertiary_edu ///
works_0 formal_work n_members urban pp_inc


/*==================================================
            1: Survey 
==================================================*/

forvalues y = 2019(1)2021 {

    *----------  1.1. Data prep:
    use "${dir3r}/01_survey/survey_targeting_r1_long.dta", clear

     ** option 1: Winsorise 

    _pctile income_pp_nt if year == 2021, n(100)

    gen _aux = income_pp_nt if income_pp_nt <= r(r95) & year == 2021
    bys id_vivienda_SISBEN: egen income_pp_nt_w = max(_aux)
    drop _aux

    keep if !missing(income_pp_nt_w) // we keep the families within the 88th percentile 

    sum income_pp_nt if year == 2021, det
    sca sample_l = r(min)
    sca sample_u = r(max)

    keep if year == `y'

    keep work_any_srv formal_work_srv informal_work_srv income_pp_nt pension pondera

    glo covs work_any_srv formal_work_srv informal_work_srv income_pp_nt pension

    // estimation 
    loc j = 0
    loc rowname ""				// An empty local to keep track of var/names
    loc hline ""
    foreach v of global covs { 	
        loc ++j

        // regressions
        sum `v' [aw=pondera]
        scalar v_mean = r(mean)
        scalar v_min = r(min)
        scalar v_max = r(max)
        scalar v_sd = r(sd)
        scalar v_N = r(N)


        // store data in matrix
        *matrix output = J(1,4,.)
        *matrix output = J(1,2,.)
        matrix output = J(1,1,.)
        matrix output[1,1] = v_mean
        *matrix output[1,2] = v_sd
        *matrix output[1,3] = v_min
        *matrix output[1,4] = v_max

        if (`j' == 1) 		mat stat_srv_`y' = output
        else 				mat stat_srv_`y' = stat_srv_`y'\output

        loc rowname "`rowname' `v'"
        *loc hline = "`hline'001"
        loc hline = "`hline'01"
    }

    matrix rownames stat_srv_`y' = `rowname'
    mat stat_srv_t_`y' = stat_srv_`y''

    /*==================================================
                2: GEIH  
    ==================================================*/

    ** loop load over GEIH surveys and get relevant values into a matrix

    use "${dir3r}/03_auxiliar/GEIH_`y'11_foruse.dta", replace 

    *replace pp_inc_nt = pp_inc_nt * 1000
    *replace pp_inc = pp_inc * 1000

    replace formal_work = 0 if works == 0 & formal_work == . // consistent with survey

    gen informal_work = works == 1 & formal_work == 0 
    replace informal_work = . if formal_work == .
        
    keep works formal_work informal_work pp_inc pp_inc_nt pension_fund FEX_C18 n_members
    gen l_pp_inc_geih = log(1000+pp_inc_nt)
    glo covs_g works formal_work informal_work pp_inc_nt pension_fund

    
    forv i = 1/3 {
        preserve

        if `i' == 2       keep if inrange(pp_inc_nt, sample_l, sample_u)
        if `i' == 3       keep if pp_inc_nt > sample_u

        // estimation 
        loc j = 0
        loc rowname ""				// An empty local to keep track of var/names
        loc hline ""
        foreach v of global covs_g { 	
            loc ++j

            // regressions
            *reghdfe `v' [aw=fex_c_2011], a(DPTO) vce(robust)
            *scalar v_mean = _b[_cons]

            sum `v' [aw=FEX_C18]
            scalar v_mean = r(mean)
            scalar v_min = r(min)
            scalar v_max = r(max)
            scalar v_sd = r(sd)
            scalar v_N = r(N)


            // store data in matrix
            *matrix output = J(1,4,.)
            *matrix output = J(1,2,.)
            matrix output = J(1,1,.)
            matrix output[1,1] = v_mean
            *matrix output[1,2] = v_sd
            *matrix output[1,3] = v_min
            *matrix output[1,4] = v_max

            if (`j' == 1) 		mat stat_geih_`i'_`y' = output
            else 				mat stat_geih_`i'_`y' = stat_geih_`i'_`y'\output

            loc rowname "`rowname' `v'"
            *loc hline = "`hline'001"
            loc hline = "`hline'01"
        }

        matrix rownames stat_geih_`i'_`y' = `rowname'
        mat stat_geih_`i'_t_`y' = stat_geih_`i'_`y''
    restore
    }

}

/*==================================================
            4: Single table to export
==================================================*/

mat estmat_2019 = stat_srv_2019 , stat_geih_2_2019 , stat_geih_3_2019 , stat_geih_1_2019
mat estmat_2020 = stat_srv_2020 , stat_geih_2_2020 , stat_geih_3_2020 , stat_geih_1_2020
mat estmat_2021 = stat_srv_2021 , stat_geih_2_2021 , stat_geih_3_2021 , stat_geih_1_2021

// export table with smart looks
#delimit ;
    frmttable using TA2_GEIH, statmat(estmat_2019) ctitles( "" , "" , "2019", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full") 
    vlines(01) vlstyle(a) basefont(plain  fs11) coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    replace 
    ;
    frmttable using TA2_GEIH, statmat(estmat_2020) ctitles( "" , "" , "2020", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full") 
    vlines(01) vlstyle(a) basefont(plain  fs11) coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    addtable 
    ;
    frmttable using TA2_GEIH, statmat(estmat_2021) ctitles( "" , "" , "2021", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full")  
    vlines(01) vlstyle(a) basefont(plain  fs11) coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    addtable 
    ;
#delimit cr

#delimit ;
    frmttable using TA2_GEIH, statmat(estmat_2019) ctitles( "" , "" , "2019", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full") 
    vlines(01)coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    replace tex frag
    ;
    frmttable using TA2_GEIH, statmat(estmat_2020) ctitles( "" , "" , "2020", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full") 
    vlines(01)  coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    addtable tex frag
    ;
    frmttable using TA2_GEIH, statmat(estmat_2021) ctitles( "" , "" , "2021", "" \ "", "Survey", "GEIH comparable", "GEIH non-comparable" , "GEIH Full")  
    vlines(01)  coljust(c) 
    rtitles( "Paid Work" \ "Formal Work" \ "Informal work"  \ "PP income" \ "Pension Fund") 
    addtable tex frag
    ;
#delimit cr
